{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "时序差分和策略迭代的结合体\n",
    "\n",
    "action模型根据state计算动作,value模型评估动作的价值,有点像GAN的思路\n",
    "\n",
    "使用delay模型,类似DQN的双模型的思路,缓解自举"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdFElEQVR4nO3dfXBTdb4/8HfSNOnjSSjQhEq79C4IdHlwLVCiM3KVStHqKrB3lMtiZVm9soEB63C1u4JXr3fKD2fWlV1F53oXuLOLOOUuIBV0uwXLOsQChUp5qg+XtZWSlAeblEKTNvn8/sCea7Aiffw28H7NnBl6vt+cvNNO3j0536QYRERARNTPjKoDENGNieVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RKKCufV199FSNGjEBcXBxycnKwb98+VVGISAEl5fP222+jsLAQzz33HA4ePIiJEyciLy8PjY2NKuIQkQIGFR8szcnJweTJk/H73/8eABAOh5Geno4lS5bgmWee+d7bh8NhNDQ0IDk5GQaDoa/jEtE1EhE0NzcjLS0NRuPVz21M/ZRJFwwGUVVVhaKiIn2f0WhEbm4u3G53p7cJBAIIBAL616dOnUJWVlafZyWi7qmvr8fw4cOvOqffy+fs2bMIhUKw2+0R++12O06cONHpbYqLi/H8889/a399fT00TeuTnETUdX6/H+np6UhOTv7euf1ePt1RVFSEwsJC/euOB6hpGsuHaAC6lssh/V4+Q4YMQUxMDLxeb8R+r9cLh8PR6W0sFgssFkt/xCOiftLvq11msxnZ2dkoLy/X94XDYZSXl8PpdPZ3HCJSRMnLrsLCQhQUFGDSpEmYMmUKfvvb36KlpQULFixQEYeIFFBSPg899BDOnDmDlStXwuPx4JZbbsF77733rYvQRHT9UvI+n57y+/2wWq3w+Xy84Ew0gHTlucnPdhGREiwfIlKC5UNESrB8iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSguVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RKsHyISAmWDxEpwfIhIiVYPkSkBMuHiJRg+RCREiwfIlKC5UNESrB8iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSguVDREp0uXz27NmD+++/H2lpaTAYDNi6dWvEuIhg5cqVGDZsGOLj45Gbm4tPP/00Ys758+cxb948aJoGm82GhQsX4sKFCz16IEQUXbpcPi0tLZg4cSJeffXVTsdXr16NNWvW4PXXX0dlZSUSExORl5eH1tZWfc68efNw9OhRlJWVobS0FHv27MHjjz/e/UdBRNFHegCAbNmyRf86HA6Lw+GQl156Sd/X1NQkFotF3nrrLREROXbsmACQ/fv363N27twpBoNBTp06dU336/P5BID4fL6exCeiXtaV52avXvM5efIkPB4PcnNz9X1WqxU5OTlwu90AALfbDZvNhkmTJulzcnNzYTQaUVlZ2elxA4EA/H5/xEZE0a1Xy8fj8QAA7HZ7xH673a6PeTwepKamRoybTCakpKToc65UXFwMq9Wqb+np6b0Zm4gUiIrVrqKiIvh8Pn2rr69XHYmIeqhXy8fhcAAAvF5vxH6v16uPORwONDY2Roy3t7fj/Pnz+pwrWSwWaJoWsRFRdOvV8snMzITD4UB5ebm+z+/3o7KyEk6nEwDgdDrR1NSEqqoqfc6uXbsQDoeRk5PTm3GIaAAzdfUGFy5cwGeffaZ/ffLkSVRXVyMlJQUZGRlYtmwZXnzxRYwaNQqZmZlYsWIF0tLS8OCDDwIAxo4di5kzZ+Kxxx7D66+/jra2NixevBgPP/ww0tLSeu2BEdEA19WltN27dwuAb20FBQUicnm5fcWKFWK328Viscj06dOltrY24hjnzp2TuXPnSlJSkmiaJgsWLJDm5uZrzsCldqKBqSvPTYOIiMLu6xa/3w+r1Qqfz8frP0QDSFeem1Gx2kVE1x+WDxEpwfIhIiVYPkSkBMuHiJRg+RCREiwfIlKC5UNESrB8iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSguVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RKsHyISAmWDxEpYVIdgGggk1AIbV99hXa/H76qKgS8XsQkJCB53DgkjxuHmIQE1RGjFsuHqBMiglBLCxpLS3Hur39F6NIlhJqb9fEzpaVIvuUWpP/iF7AMGwaDwaAwbXRi+RBdQcJhtHz6KRr++Ec0f/xx53Pa2+E/cABfXLyIzH/9V5hTUvo5ZfTjNR+ir4kIwoEAGt95B5//x398Z/F804Vjx9D4zjuQcLgfEl5fWD5EuFw8AY8Hf3/lFXy5fj3am5qu+bbnKyoQamnpu3DXKb7sohteOBhEU2UlTv33fyPo9Xbr9jzz6TqWD92wRATBM2fgKSnBufJySHu76kg3FJYP3XBEBBBB8+HDqP/P/0RrfX2Pjpc4ejRi4uN7Kd2Ng+VDN5SOJXTvtm04U1ra42s1Jk2DY9YsGGJjeynhjaNLF5yLi4sxefJkJCcnIzU1FQ8++CBqa2sj5rS2tsLlcmHw4MFISkrCnDlz4L3idXRdXR3y8/ORkJCA1NRULF++HO085aU+JuEwLn72GT7793+Hp6SkV4pn+MKFSPrRj/g+n27oUvlUVFTA5XLho48+QllZGdra2jBjxgy0fOOH+OSTT2L79u0oKSlBRUUFGhoaMHv2bH08FAohPz8fwWAQe/fuxYYNG7B+/XqsXLmy9x4V0RVCly6hcft2fPpv/4aW48eBnlwgNhqRmJWFf3j6aaTccQcMRi4ad4dBRKS7Nz5z5gxSU1NRUVGBO+64Az6fD0OHDsXGjRvx05/+FABw4sQJjB07Fm63G1OnTsXOnTtx3333oaGhAXa7HQDw+uuv4+mnn8aZM2dgNpu/9379fj+sVit8Ph80TetufLoBiAhav/wSDX/6E3z79vX4orJJ0zD0vvuQet99iElM5BnPFbry3OxRZft8PgBAytfv7qyqqkJbWxtyc3P1OWPGjEFGRgbcbjcAwO12Y/z48XrxAEBeXh78fj+OHj3a6f0EAgH4/f6Ijej7hNvacH73bnz+4oto2ru3x8WTOHYs/uHppzHsn/4JpqQkFk8PdfuCczgcxrJly3D77bdj3LhxAACPxwOz2QybzRYx1263w+Px6HO+WTwd4x1jnSkuLsbzzz/f3ah0gxERtH31FU6tX4+v9u6FBIM9Op7RYkHKtGlImz8fJk1j6fSSbpePy+XCkSNH8OGHH/Zmnk4VFRWhsLBQ/9rv9yM9Pb3P75eij4RC8B8+jFMbNuDS//5vj49nSUvDTY88AtuUKTCYuDjcm7r13Vy8eDFKS0uxZ88eDB8+XN/vcDgQDAbR1NQUcfbj9XrhcDj0Ofv27Ys4XsdqWMecK1ksFlgslu5EpRuEiCDU3Azvtm1ofPddhC9e7NHxDLGxsN12G2762c9gTk3l2U4f6NI1HxHB4sWLsWXLFuzatQuZmZkR49nZ2YiNjUV5ebm+r7a2FnV1dXA6nQAAp9OJmpoaNDY26nPKysqgaRqysrJ68ljoBiXhMFqOH8fnq1bBs3lzj4vHpGnI+Jd/wYjFi2Gx21k8faRLZz4ulwsbN27Etm3bkJycrF+jsVqtiI+Ph9VqxcKFC1FYWIiUlBRomoYlS5bA6XRi6tSpAIAZM2YgKysL8+fPx+rVq+HxePDss8/C5XLx7Ia6LNTaisZt29BYWor2rxdAussQE4OkceMwfMECxI8YwSX0Ptalpfbv+g2wbt06PProowAuv8nwqaeewltvvYVAIIC8vDy89tprES+pvvjiCyxatAgffPABEhMTUVBQgFWrVsF0ja+pudROIoLW+no0/PGPaKqsBLr/jhEAQExyMhyzZmHoPffAmJDAs51u6spzs0fv81GF5XPjEhFIezvO79mDhj/9CW1nz/b4mAk//CHSH38ciaNH82ynh7ry3OTle4oaIoKg14vTX38KvUfvUsbli8pD770X9lmzEDtoEM92+hnLh6KChELwf/wxvnzzTbR++WWPj2e56abLS+iTJ3MJXRF+12lAExG0+/3wbt2KMzt2IHzpUs8OGBODwf/4jxj20EMwcyVLKZYPDVgSDqPlxAnU/9d/4eLnn/f4ZZZ56FDYZ83CkBkzYLyGzxBS32L50IAUunQJZ3buxOm33+752Y7RCOukSUj7539GfGYmz3YGCJYPDSgdn0I/tX49/AcPQkKhHh3PpGmwz559eQk9Lo7FM4CwfGjACAeDl5fQN27slSX0pKws3PToo0i8+WYuoQ9ALB9STkTQdvYsvly3Dk2VlZC2th4dzxgfj8F33YW0uXNh4vvABiyWDykVbm+Hv6oKDRs34tLJkz0+XvyIEUibNw/W7GwuoQ9w/OmQEh1L6J7Nm3G2rKznn0I3m5EybdrlJfShQ3ltJwqwfKjfiQguHD+OU+vWoeWTT3r8uazYlBSk/exnSLnjDi6hRxGWD/UrEUGgoQFfrFmDQENDj45liIlB8sSJGL5wIeKGD+fZTpRh+VC/Cl28iLo33kCgoQEigqZgEJ83N8NqNuOHyckwXmOBmGw2OGbPxpCZM2G0WFg8UYjlQ/3Kt38/mg8fhoigrqUFKw4dQq3Ph0STCb+4+WY8lJmJmKsVicGAhJEjL38K/eabWTpRjOVD/erCsWNAOAwB8P9qanCsqQkA4G9rw++PH8ePbDZM/Pp/Q7mS0WLB0Px82B94ACabjcUT5Vg+pIz/ivfzBMNhBDp7R7PBgLj0dAxfsADaxIlcQr9O8KdI/SomPh4AYABwp8OBz/x+tH+92nWzpuEHSUkR8w0xMRicm4thDz+M2JQUnu1cR1g+1K8G33knzpaVIXThAgpGjkRybCz+evo0hsXH47Gbb0ZqXJw+1zx0KIY9/DBS7rwThpgYFs91hn9GlfqVhMNofPddnNqwARIMXv6zqLh8JgR8/XfCDQbYcnKQNm8e4jIyWDpRhH9GlQYsg9GIoTNnAgC8//M/aPvqKxi+/v1niI2FKTkZ9tmzMeTuu/WXaHR9YvlQvzPGxiI1Px/WW2+F/9AhBDweGOPjkTR2LOIzMi5f2+Gn0K97LB9SwmA0Iu6mmxB3002qo5Ai/PVCREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RKsHyISAmWDxEpwfIhIiVYPkSkBMuHiJToUvmsXbsWEyZMgKZp0DQNTqcTO3fu1MdbW1vhcrkwePBgJCUlYc6cOfB6vRHHqKurQ35+PhISEpCamorly5ejvb29dx4NEUWNLpXP8OHDsWrVKlRVVeHAgQO466678MADD+Do0aMAgCeffBLbt29HSUkJKioq0NDQgNmzZ+u3D4VCyM/PRzAYxN69e7FhwwasX78eK1eu7N1HRUQDn/TQoEGD5M0335SmpiaJjY2VkpISfez48eMCQNxut4iI7NixQ4xGo3g8Hn3O2rVrRdM0CQQC33kfra2t4vP59K2+vl4AiM/n62l8IupFPp/vmp+b3b7mEwqFsGnTJrS0tMDpdKKqqgptbW3Izc3V54wZMwYZGRlwu90AALfbjfHjx8Nut+tz8vLy4Pf79bOnzhQXF8Nqtepbenp6d2MT0QDR5fKpqalBUlISLBYLnnjiCWzZsgVZWVnweDwwm82w2WwR8+12OzweDwDA4/FEFE/HeMfYdykqKoLP59O3+vr6rsYmogGmy3/DefTo0aiurobP58PmzZtRUFCAioqKvsims1gssFgsfXofRNS/ulw+ZrMZI0eOBABkZ2dj//79eOWVV/DQQw8hGAyiqakp4uzH6/XC4XAAABwOB/bt2xdxvI7VsI45RHRj6PH7fMLhMAKBALKzsxEbG4vy8nJ9rLa2FnV1dXA6nQAAp9OJmpoaNDY26nPKysqgaRqysrJ6GoWIokiXznyKiopwzz33ICMjA83Nzdi4cSM++OADvP/++7BarVi4cCEKCwuRkpICTdOwZMkSOJ1OTJ06FQAwY8YMZGVlYf78+Vi9ejU8Hg+effZZuFwuvqwiusF0qXwaGxvxyCOP4PTp07BarZgwYQLef/993H333QCAl19+GUajEXPmzEEgEEBeXh5ee+01/fYxMTEoLS3FokWL4HQ6kZiYiIKCArzwwgu9+6iIaMDj/9VORL2mK89NfraLiJRg+RCREiwfIlKC5UNESrB8iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSguVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RKsHyISAmWDxEpwfIhIiVYPkSkBMuHiJRg+RCREiwfIlKC5UNESrB8iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSokfls2rVKhgMBixbtkzf19raCpfLhcGDByMpKQlz5syB1+uNuF1dXR3y8/ORkJCA1NRULF++HO3t7T2JQkRRptvls3//frzxxhuYMGFCxP4nn3wS27dvR0lJCSoqKtDQ0IDZs2fr46FQCPn5+QgGg9i7dy82bNiA9evXY+XKld1/FEQUfaQbmpubZdSoUVJWVibTpk2TpUuXiohIU1OTxMbGSklJiT73+PHjAkDcbreIiOzYsUOMRqN4PB59ztq1a0XTNAkEAp3eX2trq/h8Pn2rr68XAOLz+boTn4j6iM/nu+bnZrfOfFwuF/Lz85Gbmxuxv6qqCm1tbRH7x4wZg4yMDLjdbgCA2+3G+PHjYbfb9Tl5eXnw+/04evRop/dXXFwMq9Wqb+np6d2JTUQDSJfLZ9OmTTh48CCKi4u/NebxeGA2m2Gz2SL22+12eDwefc43i6djvGOsM0VFRfD5fPpWX1/f1dhENMCYujK5vr4eS5cuRVlZGeLi4voq07dYLBZYLJZ+uz8i6ntdOvOpqqpCY2Mjbr31VphMJphMJlRUVGDNmjUwmUyw2+0IBoNoamqKuJ3X64XD4QAAOByOb61+dXzdMYeIrn9dKp/p06ejpqYG1dXV+jZp0iTMmzdP/3dsbCzKy8v129TW1qKurg5OpxMA4HQ6UVNTg8bGRn1OWVkZNE1DVlZWLz0sIhrouvSyKzk5GePGjYvYl5iYiMGDB+v7Fy5ciMLCQqSkpEDTNCxZsgROpxNTp04FAMyYMQNZWVmYP38+Vq9eDY/Hg2effRYul4svrYhuIF0qn2vx8ssvw2g0Ys6cOQgEAsjLy8Nrr72mj8fExKC0tBSLFi2C0+lEYmIiCgoK8MILL/R2FCIawAwiIqpDdJXf74fVaoXP54OmaarjENHXuvLc5Ge7iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSguVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RKsHyISAmWDxEpwfIhIiVYPkSkBMuHiJRg+RCREiwfIlKC5UNESrB8iEgJlg8RKcHyISIlWD5EpATLh4iUYPkQkRIsHyJSguVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlDCpDtAdIgIA8Pv9ipMQ0Td1PCc7nqNXE5Xlc+7cOQBAenq64iRE1Jnm5mZYrdarzonK8klJSQEA1NXVfe8DHGj8fj/S09NRX18PTdNUx7lmzN2/ojW3iKC5uRlpaWnfOzcqy8dovHypymq1RtUP5ps0TYvK7Mzdv6Ix97WeEPCCMxEpwfIhIiWisnwsFguee+45WCwW1VG6LFqzM3f/itbcXWGQa1kTIyLqZVF55kNE0Y/lQ0RKsHyISAmWDxEpwfIhIiWisnxeffVVjBgxAnFxccjJycG+ffuU5tmzZw/uv/9+pKWlwWAwYOvWrRHjIoKVK1di2LBhiI+PR25uLj799NOIOefPn8e8efOgaRpsNhsWLlyICxcu9Gnu4uJiTJ48GcnJyUhNTcWDDz6I2traiDmtra1wuVwYPHgwkpKSMGfOHHi93og5dXV1yM/PR0JCAlJTU7F8+XK0t7f3We61a9diwoQJ+rt/nU4ndu7cOaAzd2bVqlUwGAxYtmxZ1GXvFRJlNm3aJGazWf7whz/I0aNH5bHHHhObzSZer1dZph07dsivf/1r+fOf/ywAZMuWLRHjq1atEqvVKlu3bpWPP/5YfvKTn0hmZqZcunRJnzNz5kyZOHGifPTRR/K3v/1NRo4cKXPnzu3T3Hl5ebJu3To5cuSIVFdXy7333isZGRly4cIFfc4TTzwh6enpUl5eLgcOHJCpU6fKbbfdpo+3t7fLuHHjJDc3Vw4dOiQ7duyQIUOGSFFRUZ/lfuedd+Tdd9+VTz75RGpra+VXv/qVxMbGypEjRwZs5ivt27dPRowYIRMmTJClS5fq+6Mhe2+JuvKZMmWKuFwu/etQKCRpaWlSXFysMNX/ubJ8wuGwOBwOeemll/R9TU1NYrFY5K233hIRkWPHjgkA2b9/vz5n586dYjAY5NSpU/2WvbGxUQBIRUWFnjM2NlZKSkr0OcePHxcA4na7ReRy8RqNRvF4PPqctWvXiqZpEggE+i37oEGD5M0334yKzM3NzTJq1CgpKyuTadOm6eUTDdl7U1S97AoGg6iqqkJubq6+z2g0Ijc3F263W2Gy73by5El4PJ6IzFarFTk5OXpmt9sNm82GSZMm6XNyc3NhNBpRWVnZb1l9Ph+A//urAVVVVWhra4vIPmbMGGRkZERkHz9+POx2uz4nLy8Pfr8fR48e7fPMoVAImzZtQktLC5xOZ1RkdrlcyM/Pj8gIRMf3uzdF1afaz549i1AoFPGNBwC73Y4TJ04oSnV1Ho8HADrN3DHm8XiQmpoaMW4ymZCSkqLP6WvhcBjLli3D7bffjnHjxum5zGYzbDbbVbN39tg6xvpKTU0NnE4nWltbkZSUhC1btiArKwvV1dUDNjMAbNq0CQcPHsT+/fu/NTaQv999IarKh/qOy+XCkSNH8OGHH6qOck1Gjx6N6upq+Hw+bN68GQUFBaioqFAd66rq6+uxdOlSlJWVIS4uTnUc5aLqZdeQIUMQExPzrav/Xq8XDodDUaqr68h1tcwOhwONjY0R4+3t7Th//ny/PK7FixejtLQUu3fvxvDhw/X9DocDwWAQTU1NV83e2WPrGOsrZrMZI0eORHZ2NoqLizFx4kS88sorAzpzVVUVGhsbceutt8JkMsFkMqGiogJr1qyByWSC3W4fsNn7QlSVj9lsRnZ2NsrLy/V94XAY5eXlcDqdCpN9t8zMTDgcjojMfr8flZWVeman04mmpiZUVVXpc3bt2oVwOIycnJw+yyYiWLx4MbZs2YJdu3YhMzMzYjw7OxuxsbER2Wtra1FXVxeRvaamJqI8y8rKoGkasrKy+iz7lcLhMAKBwIDOPH36dNTU1KC6ulrfJk2ahHnz5un/HqjZ+4TqK95dtWnTJrFYLLJ+/Xo5duyYPP7442Kz2SKu/ve35uZmOXTokBw6dEgAyG9+8xs5dOiQfPHFFyJyeandZrPJtm3b5PDhw/LAAw90utT+4x//WCorK+XDDz+UUaNG9flS+6JFi8RqtcoHH3wgp0+f1reLFy/qc5544gnJyMiQXbt2yYEDB8TpdIrT6dTHO5Z+Z8yYIdXV1fLee+/J0KFD+3Tp95lnnpGKigo5efKkHD58WJ555hkxGAzyl7/8ZcBm/i7fXO2Ktuw9FXXlIyLyu9/9TjIyMsRsNsuUKVPko48+Uppn9+7dAuBbW0FBgYhcXm5fsWKF2O12sVgsMn36dKmtrY04xrlz52Tu3LmSlJQkmqbJggULpLm5uU9zd5YZgKxbt06fc+nSJfnlL38pgwYNkoSEBJk1a5acPn064jh///vf5Z577pH4+HgZMmSIPPXUU9LW1tZnuX/+85/LD37wAzGbzTJ06FCZPn26XjwDNfN3ubJ8oil7T/Hv+RCRElF1zYeIrh8sHyJSguVDREqwfIhICZYPESnB8iEiJVg+RKQEy4eIlGD5EJESLB8iUoLlQ0RK/H+eeyhxnIIArgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "\n",
    "#定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "\n",
    "    def __init__(self):\n",
    "        env = gym.make('Pendulum-v1', render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "        self.step_n = 0\n",
    "\n",
    "    def reset(self):\n",
    "        state, _ = self.env.reset()\n",
    "        self.step_n = 0\n",
    "        return state\n",
    "\n",
    "    def step(self, action):\n",
    "        state, reward, terminated, truncated, info = self.env.step(\n",
    "            [action * 2])\n",
    "        over = terminated or truncated\n",
    "\n",
    "        #偏移reward,便于训练\n",
    "        reward = (reward + 8) / 8\n",
    "\n",
    "        #限制最大步数\n",
    "        self.step_n += 1\n",
    "        if self.step_n >= 200:\n",
    "            over = True\n",
    "\n",
    "        return state, reward, over\n",
    "\n",
    "    #打印游戏图像\n",
    "    def show(self):\n",
    "        from matplotlib import pyplot as plt\n",
    "        plt.figure(figsize=(3, 3))\n",
    "        plt.imshow(self.env.render())\n",
    "        plt.show()\n",
    "\n",
    "\n",
    "env = MyWrapper()\n",
    "\n",
    "env.reset()\n",
    "\n",
    "env.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.0432],\n",
       "        [-0.2554]], grad_fn=<TanhBackward0>)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "\n",
    "class Model(torch.nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.s = torch.nn.Sequential(\n",
    "            torch.nn.Linear(3, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 1),\n",
    "            torch.nn.Tanh(),\n",
    "        )\n",
    "\n",
    "    def forward(self, state):\n",
    "        return self.s(state)\n",
    "\n",
    "\n",
    "model_action = Model()\n",
    "model_action_delay = Model()\n",
    "model_action_delay.load_state_dict(model_action.state_dict())\n",
    "\n",
    "model_action(torch.randn(2, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.0708],\n",
       "        [-0.0770]], grad_fn=<AddmmBackward0>)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_value = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "\n",
    "model_value_delay = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "\n",
    "model_value_delay.load_state_dict(model_value.state_dict())\n",
    "\n",
    "model_value(torch.randn(2, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32.72024895910162"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "import random\n",
    "\n",
    "\n",
    "#玩一局游戏并记录数据\n",
    "def play(show=False):\n",
    "    data = []\n",
    "    reward_sum = 0\n",
    "\n",
    "    state = env.reset()\n",
    "    over = False\n",
    "    while not over:\n",
    "        action = model_action(torch.FloatTensor(state).reshape(1, 3)).item()\n",
    "\n",
    "        #给动作添加噪声,增加探索\n",
    "        action += random.normalvariate(mu=0, sigma=0.2)\n",
    "\n",
    "        next_state, reward, over = env.step(action)\n",
    "\n",
    "        data.append((state, action, reward, next_state, over))\n",
    "        reward_sum += reward\n",
    "\n",
    "        state = next_state\n",
    "\n",
    "        if show:\n",
    "            display.clear_output(wait=True)\n",
    "            env.show()\n",
    "\n",
    "    return data, reward_sum\n",
    "\n",
    "\n",
    "play()[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_9466/1738991660.py:27: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  ../torch/csrc/utils/tensor_new.cpp:201.)\n",
      "  state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(torch.Size([64, 3]),\n",
       " 200,\n",
       " (array([-0.45102915, -0.8925092 ,  0.04206204], dtype=float32),\n",
       "  -0.04745164839083892,\n",
       "  0.4804321853483189,\n",
       "  array([-0.4794219 , -0.8775845 , -0.64155537], dtype=float32),\n",
       "  False))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#数据池\n",
    "class Pool:\n",
    "\n",
    "    def __init__(self):\n",
    "        self.pool = []\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.pool)\n",
    "\n",
    "    def __getitem__(self, i):\n",
    "        return self.pool[i]\n",
    "\n",
    "    #更新动作池\n",
    "    def update(self):\n",
    "        #每次更新不少于N条新数据\n",
    "        old_len = len(self.pool)\n",
    "        while len(pool) - old_len < 200:\n",
    "            self.pool.extend(play()[0])\n",
    "\n",
    "        #只保留最新的N条数据\n",
    "        self.pool = self.pool[-2_0000:]\n",
    "\n",
    "    #获取一批数据样本\n",
    "    def sample(self):\n",
    "        data = random.sample(self.pool, 64)\n",
    "\n",
    "        state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n",
    "        action = torch.FloatTensor([i[1] for i in data]).reshape(-1, 1)\n",
    "        reward = torch.FloatTensor([i[2] for i in data]).reshape(-1, 1)\n",
    "        next_state = torch.FloatTensor([i[3] for i in data]).reshape(-1, 3)\n",
    "        over = torch.LongTensor([i[4] for i in data]).reshape(-1, 1)\n",
    "\n",
    "        return state, action, reward, next_state, over\n",
    "\n",
    "\n",
    "pool = Pool()\n",
    "pool.update()\n",
    "state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "next_state.shape, len(pool), pool[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer_action = torch.optim.Adam(model_action.parameters(), lr=5e-4)\n",
    "optimizer_value = torch.optim.Adam(model_value.parameters(), lr=5e-3)\n",
    "\n",
    "\n",
    "def soft_update(_from, _to):\n",
    "    for _from, _to in zip(_from.parameters(), _to.parameters()):\n",
    "        value = _to.data * 0.7 + _from.data * 0.3\n",
    "        _to.data.copy_(value)\n",
    "\n",
    "\n",
    "def requires_grad(model, value):\n",
    "    for param in model.parameters():\n",
    "        param.requires_grad_(value)\n",
    "\n",
    "\n",
    "requires_grad(model_action_delay, False)\n",
    "requires_grad(model_value_delay, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2836421728134155"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_action(state):\n",
    "    requires_grad(model_action, True)\n",
    "    requires_grad(model_value, False)\n",
    "\n",
    "    #首先把动作计算出来\n",
    "    action = model_action(state)\n",
    "\n",
    "    #使用value网络评估动作的价值,价值是越高越好\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    loss = -model_value(input).mean()\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer_action.step()\n",
    "    optimizer_action.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_action(state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.28993305563926697"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_value(state, action, reward, next_state, over):\n",
    "    requires_grad(model_action, False)\n",
    "    requires_grad(model_value, True)\n",
    "\n",
    "    #计算value\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    value = model_value(input)\n",
    "\n",
    "    #计算target\n",
    "    with torch.no_grad():\n",
    "        next_action = model_action_delay(next_state)\n",
    "        input = torch.cat([next_state, next_action], dim=1)\n",
    "        target = model_value_delay(input)\n",
    "    target = target * 0.99 * (1 - over) + reward\n",
    "\n",
    "    #计算td loss,更新参数\n",
    "    loss = torch.nn.functional.mse_loss(value, target)\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer_value.step()\n",
    "    optimizer_value.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_value(state, action, reward, next_state, over)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 400 13.689460329425094\n",
      "20 4400 61.277334086234745\n",
      "40 8400 148.42843627970188\n",
      "60 12400 180.47028353752262\n",
      "80 16400 176.3395203401841\n",
      "100 20000 181.12751784525307\n",
      "120 20000 180.88091179850485\n",
      "140 20000 143.26279595829368\n",
      "160 20000 179.55250829150913\n",
      "180 20000 175.80709007378786\n"
     ]
    }
   ],
   "source": [
    "#训练\n",
    "def train():\n",
    "    model_action.train()\n",
    "    model_value.train()\n",
    "\n",
    "    #共更新N轮数据\n",
    "    for epoch in range(200):\n",
    "        pool.update()\n",
    "\n",
    "        #每次更新数据后,训练N次\n",
    "        for i in range(200):\n",
    "\n",
    "            #采样N条数据\n",
    "            state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "            #训练模型\n",
    "            train_action(state)\n",
    "            train_value(state, action, reward, next_state, over)\n",
    "\n",
    "        soft_update(model_action, model_action_delay)\n",
    "        soft_update(model_value, model_value_delay)\n",
    "\n",
    "        if epoch % 20 == 0:\n",
    "            test_result = sum([play()[-1] for _ in range(20)]) / 20\n",
    "            print(epoch, len(pool), test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlSUlEQVR4nO3dfXRTdZ4/8HfSpOljUkrbBKSFKih0eC4PjeDoDJHy4PiEv+MwLFOUdZApDgw7nrEM4uJxrItzxlFHQI9HZc+uwy4uKIMU6bZYFMJToVAKFNRCKyUtBZq0pU3b3M/vD2mWyFPSp5s479c5OYfc+733fm5L3v3mfu+DRkQERES9TKt2AUT0j4nhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpQLXzeeustDBo0CBEREZg4cSL27dunVilEpAJVwue//uu/sHTpUrzwwgs4ePAgRo0ahczMTNTW1qpRDhGpQKPGhaUTJ07E+PHj8de//hUAoCgKkpOT8cwzz+C555675fKKoqC6uhqxsbHQaDQ9XS4R+UlE0NDQgP79+0OrvXnfRtdLNXm1traiuLgYOTk53mlarRY2mw12u/26y7jdbrjdbu/7s2fPIi0trcdrJaLOqaqqwoABA27aptfDp66uDh6PB2az2We62WzGiRMnrrtMbm4uVq5cec30qqoqGI3GHqmTiALncrmQnJyM2NjYW7bt9fDpjJycHCxdutT7vmMHjUYjw4coCPlzOKTXwychIQFhYWGoqanxmV5TUwOLxXLdZQwGAwwGQ2+UR0S9pNdHu8LDw5Geno6CggLvNEVRUFBQAKvV2tvlEJFKVPnatXTpUmRlZWHcuHGYMGEC/vKXv6CpqQlPPPGEGuUQkQpUCZ/HH38c58+fx4oVK+BwODB69Ghs27btmoPQRPTDpcp5Pl3lcrlgMpngdDp5wJkoiATy2eS1XUSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqCDh8du7ciZ/97Gfo378/NBoNPv74Y5/5IoIVK1agX79+iIyMhM1mw6lTp3zaXLx4EXPmzIHRaERcXBzmz5+PxsbGLu0IEYWWgMOnqakJo0aNwltvvXXd+atWrcIbb7yBtWvXYu/evYiOjkZmZiZaWlq8bebMmYOysjLk5+djy5Yt2LlzJ371q191fi+IKPRIFwCQTZs2ed8riiIWi0VeffVV77T6+noxGAzyt7/9TUREjh07JgBk//793jZ5eXmi0Wjk7Nmzfm3X6XQKAHE6nV0pn4i6WSCfzW495lNRUQGHwwGbzeadZjKZMHHiRNjtdgCA3W5HXFwcxo0b521js9mg1Wqxd+/e667X7XbD5XL5vIgotHVr+DgcDgCA2Wz2mW42m73zHA4HkpKSfObrdDrEx8d723xfbm4uTCaT95WcnNydZRORCkJitCsnJwdOp9P7qqqqUrskIuqibg0fi8UCAKipqfGZXlNT451nsVhQW1vrM7+9vR0XL170tvk+g8EAo9Ho8yKi0Nat4ZOamgqLxYKCggLvNJfLhb1798JqtQIArFYr6uvrUVxc7G1TWFgIRVEwceLE7iyHiIKYLtAFGhsb8dVXX3nfV1RUoKSkBPHx8UhJScGSJUvw0ksvYciQIUhNTcXzzz+P/v374+GHHwYADBs2DNOmTcNTTz2FtWvXoq2tDYsWLcLPf/5z9O/fv9t2jIiCXKBDaTt27BAA17yysrJE5Lvh9ueff17MZrMYDAaZMmWKlJeX+6zjwoULMnv2bImJiRGj0ShPPPGENDQ0+F0Dh9qJglMgn02NiIiK2dcpLpcLJpMJTqeTx3+Igkggn82QGO0ioh8ehg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqgj4DGeiYCUi8DQ1wXP5MiCCsKgohMXEQKPRqF0aXQfDh34QPM3NuLBjBy4UFsJ99ixEUWCwWBD/4x8jYepU6GJj1S6RvofhQyFNRKA0N+PbDz7Ahfx8iMfjnddcUYGzZ86g6dQppCxcCJ3RyF5QEOExHwpp7U4nqj/8EHXfCx4vRUG93Y5z69cDitL7BdINMXwoZIkIaj75BLWbNwPXC57/a4iLRUW4XFHRe8XRLTF8KGS1nj+PSzt3+tXW09iIFt4BM6gwfChkXfriC7SeP692GdRJDB8KSZ7mZjj37/d/gbAwaCMieq4gChjDh0KOiKCxrAxN5eV+LxM5cCCMo0b1YFUUKIYPhR5FwcUvvrj+6Nb1hIUhacYMaCMje7YuCgjDh0KOu6YGroMH/W4fcdttiLNaeY5PkGH4UEgRRcGFHTvQ7nT6vUzchAkIi4npwaqoMxg+FFI8TU24uGOH3+3DYmIQf9997PUEIYYPhQwRgevQIbRdvOj3MsbRoxHBRzIFJYYPhQxpb8f5bdsg7e3+LaDVos8990Cj4yWMwYjhQyFBRNB85gyaT5/2e5nowYNhHDmy54qiLmH4UMi4UFAAT2Oj3+373n8/wqKje7Ai6gqGD4UE97lzuLR7t9/t9X36sNcT5Bg+FBIajhxB+6VLfrePmzwZ4RZLD1ZEXcXwoaDnaWpC3fbtfrfX6PXoY7X2YEXUHRg+FPRchw/j8tdf+90+Ji0N0UOH8tyeIMfwoaAmHg8u7NgBiPjVXqPTIXH6dGjCwnq4Muoqhg8FtZZvv0Xj0aN+tw9PTIRx9Gj2ekIAw4eCloigrqAAnqYmv5eJv+8+3rcnRDB8KGi1u1xwFRf73V7Xp89313Fp+d86FPC3REFJRFC/dy9aqqv9XsY0bhwMSUk9WBV1J4YPBSXF7UbdZ5/d/KkUV9Ho9egzaRLAXk/I4G+Kgo6IoLmiAi3ffuv3MtFDhyJm2DAeaA4hAYVPbm4uxo8fj9jYWCQlJeHhhx9G+ffuo9vS0oLs7Gz07dsXMTExmDVrFmpqanzaVFZWYubMmYiKikJSUhKeffZZtPt7pTKFLBFBe3s73G43mpubcfnyZbS2tkK+P4yuKKjbvh1Kc7N/K9ZoYBo3DmG8TWpICeheA0VFRcjOzsb48ePR3t6OZcuWYerUqTh27Biir1zA99vf/haffvopNmzYAJPJhEWLFuHRRx/Frl27AAAejwczZ86ExWLB7t27ce7cOfzyl7+EXq/Hyy+/3P17SKoREXg8HtTX1+PYsWMoKSnBsWPHcObMGdTW1qKxsRG/+MUv8Pzzz/v0WNouXYKrpMTv7YQnJiL+nnt6YA+oJ2nkmj87/jt//jySkpJQVFSEH//4x3A6nUhMTMSHH36Ixx57DABw4sQJDBs2DHa7HRkZGcjLy8MDDzyA6upqmM1mAMDatWvx+9//HufPn0d4ePgtt+tyuWAymeB0OmE0GjtbPvUQEUFDQwOKi4uxadMmFBQU4OzZswCAmJgYxMfHIzExEbGxsXjwwQcxb948aK8cqxER1G7ejG/fe8/vEwstjz2G/nPn8itXEAjks9mluyw5r9xHNz4+HgBQXFyMtrY22Gw2b5uhQ4ciJSXFGz52ux0jRozwBg8AZGZmYuHChSgrK8OYMWOu2Y7b7Ybb7fbZQQo+IoLGxkZs374db7/9Nvbu3YuIiAiMHDkSc+fOxbhx43DHHXcgISEBer0eGo0GYWFhPqHhaWxEXX6+/2c0h4cj7u67GTwhqNPhoygKlixZgkmTJmH48OEAAIfDgfDwcMTFxfm0NZvNcDgc3jZXB0/H/I5515Obm4uVK1d2tlTqYSICRVGwb98+5ObmorCwEP369cNvfvMbPPbYYxg6dKi3R3urkGgsK0NLZaXf2zaOHo2o1NQu1U/q6HT4ZGdn4+jRo/jyyy+7s57rysnJwdKlS73vXS4XkpOTe3y7dGsdvZ21a9fiz3/+MwDgN7/5DZ566ikMHDgQGo3G716JeDyoKygIaPt9p0zh8HqI6lT4LFq0CFu2bMHOnTsxYMAA73SLxYLW1lbU19f79H5qampguXJvFYvFgn379vmsr2M0zHKD+68YDAYYDIbOlEo9SERQXV2NnJwcbNy4EXfffTdWrlyJ8ePHQ9eJ+yZfPn0aDQEcaI4YMACxw4fzK1eICuhPhohg0aJF2LRpEwoLC5H6ve5ueno69Ho9Cq7661VeXo7KykpYr9xfxWq1orS0FLW1td42+fn5MBqNSEtL68q+UC8SEXzzzTd48sknsXnzZmRnZ+PDDz9ERkZGp4JHRFBvt0O56tjeTWk0SJg6lc/jCmEB/S/p+A/2ySefIDY21nuMxmQyITIyEiaTCfPnz8fSpUsRHx8Po9GIZ555BlarFRkZGQCAqVOnIi0tDXPnzsWqVavgcDiwfPlyZGdns3cTIkQEp0+fxoIFC3DkyBG8/PLLePLJJxHRhQs62y5cwMWiIr/b6/v0QZ9Jk9jrCWEBhc+aNWsAAPfdd5/P9Pfffx/z5s0DALz22mvQarWYNWsW3G43MjMzsXr1am/bsLAwbNmyBQsXLoTVakV0dDSysrLw4osvdm1PqFeICOrq6rB48WIcPnwYq1atwty5cxHWhfvniAgu7dqF1qt6w7cSO3o09FdGWSk0dek8H7XwPB/1tLS04Nlnn8W6deuwcuVKLFq0CDqdrks9EE9LC75+6SU0HDniV3ttRAQG/+u/8nKKIBTIZ5PDBOQ3EcGGDRuwbt06zJkzBwsWLOhy8IgImk6cQOOJE34vE3HbbYhKTWXwhDg+ypH89vXXX+Oll15CWloali9fjqioqC6v0/sU0tZW/xbQapGQmckbhv0AMHzIL21tbXjjjTdQW1uL1157Df276fnnbRcuoKG01O/2hn79EJeRwV7PDwC/dpFfSkpKsH79ejz66KOYMmVKt3z4RQQXi4rgaWjwexlTejp0JlOXt03qY/jQLbW2tuLdd9+FRqPBokWLuu2UCKW5+bsnU/hJGxWFvjYbez0/EAwfuqXTp09jy5YtmD59OkaMGNEt6xQRuA4fDmh4PW78eERedUY9hTaGD92UiGDr1q1oamrCnDlzunQ+jw+PB+fz8iD+3kROo0Hfn/4Umk6cPU3BieFDN9Xc3Iy8vDzccccdmDBhQrd95Wk8eRKNx4753T7q9tsRdeedfrfvuNI+BE9j+4fB8KGbqq6uxuHDh2Gz2RAbG9st6xQR1O/e7f/wOoC+99+PsACG9kUEBQUFaGlp6UyJ1AsYPnRDIoJDhw7B7XZjUjdeR9VaU4NLV26r6w9dXByMo0YFtP26ujr88Y9/REVFRWdKpF7A8KEbEhEcOXIEERER+NGPftRt4eM6cgRtFy743T7OaoWhX7+AtrFr1y7s27cP27dv51evIMXwoRtqb29HeXk5kpKSrrn7ZGd5Ll/Ghf/9X7/bh8XEIMFmAwIIvra2Nvz3f/833G438vLy0BTA45ap9zB86IYURUFlZSUGDRoEvV7fLetsKC1F08mTAK5cze5240BdHU65XFCu00OJHDgQkYMGBdTr+vrrr1F05fYcBw4cwNdff90ttVP3YvjQDSmKgosXL8JisXTbELuzuBi4MgpV2dSExfv2IXvPHizYvRvrKyrguSqANDodEqdNC2h4XUSQl5fnvVmdy+XCZ599BkVRuqV+6j4MH7opj8cDo9HY7WcVC4B/Ky3Fsfp6eETgamvDX48fx9FLl7xtdHFxMI4ZE9C2XS4XPv74Y+9xHkVRsHXrVly+fLlb66euY/jQLXU85qa7udrafN63KgrcVz2bve9PfhLQbVJFBIcPH8aRI0cwePBghIWFITExEWVlZThx4gQPPAcZhg/d0nUfadxJHY801gD4icUC3VWhdqfRiIFXwiYsNva7M5oDeDKFx+NBfn4+/vmf/xmvvPIKdDodli1bhkceeQSff/45wyfI8Fx1uimtVguXy9VtH9y+P/kJ6vLz4WlsRNbgwYjV6/G/586hX2QknrrzTiRduU9Pn8mTER7gCFtbWxvuv/9+TJgwAZ9//jk8Hg/uvPNOPPnkk9i9ezcURfE+GZXUx/ChG9Jqtejbty8cDgc8Hk+nnkrxfREpKeg3ezbOrlsHXWsr/t+gQXhs0CB09H80Gg1i0tLQ7/HHoQ1wexEREbjnyjPbq6urodVqkZSUhOjoaNx///1drp26F8OHbkir1SIlJQXl5eVoa2vrlltpaLRaJE6bBgCo+Z//QdulS9Bc6VVp9HoYx4zBgPnzEd6Jm8N3HJcSEZw6dQomkwl9+vThLTiCFMOHbkin02Ho0KH44osv4HA4MHjw4G5Zr1avR9LMmTCNHQvXoUNwOxzQRkYiZtgwxAwb5j0u1FkejwdlZWUYMGAATLzxWNBi+NANaTQajBw5Ei0tLTh27BjuuOOObutFaLRaRNx2GyJuu61b1ne18+fP49SpUxg7dizDJ4jx6BvdkEajwZgxY2AwGLBr166QGS06deoUKisrcc8993Tf/Yeo2zF86Kb69euHMWPGoLCwEI2NjWqXc0uKomDbtm2IjIzEpEmT1C6HboLhQzcVGRmJGTNm4KuvvsL+/fuDvvdz8eJFbNu2Denp6d12jIp6BsOHbkqj0WD69OmIjo7Gf/zHf8Bz1RnIwUZEsHPnTpw8eRKPP/54l54dTz2P4UO3NHDgQDzwwAPIy8tDaQDP2OptjY2NePfddzFw4EBMuzKcT8GL4UO3pNfrMX/+fCiKgjfffBNut1vtkq4hIti2bRt27dqFrKwsWCwWnt8T5Bg+5JfRo0fj5z//OTZu3IiCgoKgOvYjInA4HPjTn/6E1NRU/NM//RODJwQwfMgver0eixcvhsViwcqVK1FdXa12SV5tbW14/fXXceLECfzud79jrydEMHzIb7fffjuWL1+OEydO4KWXXsLly5dV7wGJCP7+97/jnXfewUMPPYRHHnmEF4+GCP6WyG8ajQaPPfYY5s2bh//8z//EmjVr0N7erloAiQj279+P3//+9xg0aBBeeOEFRAXweB1SFy+voIAYDAYsX74cFRUVePnll2E0GjFv3jzodLpe/arT8WSNBQsWoL29HX/5y19w++238+tWCGHPhwKi0WiQkJCA119/HaNGjUJOTg7Wrl3bqyNgiqLAbrdj3rx5OH/+PN58801MnjyZwRNiGD4UMI1Gg0GDBuGdd97B2LFjsXz5cqxYsQJ1dXU9/hXM7Xbjo48+wty5c1FfX4/Vq1djxowZPM4Tgvgbo07RaDS444478P777+Ohhx7CW2+9hdmzZ2PPnj1ob2/v9u2JCL799lssW7YMCxYsQFxcHP793/8dDzzwAC8eDVUSgNWrV8uIESMkNjZWYmNjJSMjQ7Zu3eqd39zcLL/+9a8lPj5eoqOj5dFHHxWHw+GzjjNnzsiMGTMkMjJSEhMT5Xe/+520tbUFUoY4nU4BIE6nM6DlqPspiiINDQ3y6quvisViEYvFIsuWLZNvvvlGPB6PKIrSpXUriiKXLl2SdevWyejRoyU6Olp+8YtfyFdffdWldVPPCOSzGVD4bN68WT799FM5efKklJeXy7Jly0Sv18vRo0dFROTpp5+W5ORkKSgokAMHDkhGRobcfffd3uXb29tl+PDhYrPZ5NChQ7J161ZJSEiQnJycHttB6nmKokh7e7vY7XZ58MEHJTo6WoYMGSJ/+MMf5ODBg9Lc3Ox3EHUETltbm5w5c0befvttmTRpkkRFRcmPfvQjee+996ShoYHBE6R6LHyup0+fPvLuu+9KfX296PV62bBhg3fe8ePHBYDY7XYREdm6datotVqf3tCaNWvEaDSK2+2+4TZaWlrE6XR6X1VVVQyfIKQoirhcLtm4caNMmzZNTCaTJCYmyk9/+lP54x//KNu3b5dvvvlGnE6nXL58WVpaWqSlpUWam5ulsbFRqqurZc+ePfLOO+/InDlzJDU1VaKjoyUtLU1yc3OlqqqKoRPkAgmfTg+1ezwebNiwAU1NTbBarSguLkZbWxtsNpu3zdChQ5GSkgK73Y6MjAzY7XaMGDHC57nfmZmZWLhwIcrKyjBmzJjrbis3NxcrV67sbKnUSzQaDWJjY/Hwww/DZrOhuLgYGzduRGFhIVatWgURQUxMDOLj45GUlITY2FhoNBpcvnwZFy5cQF1dHVwuF9rb2xEXF4eJEyfiwQcfhM1mg9ls5kHlH5iAw6e0tBRWqxUtLS2IiYnBpk2bkJaWhpKSEoSHhyMuLs6nvdlshsPhAAA4HA6f4OmY3zHvRnJycrB06VLve5fLheTk5EBLp17SEUL33nsvJk+ejPr6ehw/fhwlJSU4fvw4Tp8+jfPnz+PcuXNQFAUGgwHx8fEYNmwY7rrrLowcORKjRo2C2WyGwWDgEPoPVMDhc9ddd6GkpAROpxMfffQRsrKyUFRU1BO1eRkMhm55cgL1Lo1GA51Oh4SEBNxzzz2YPHkyPB4PFEWBcuV57R3tNBoNtFotwsLCvO/phy3g8AkPD/feIS49PR379+/H66+/jscffxytra2or6/36f3U1NTAYrEAACwWC/bt2+ezvpqaGu88+mHrCCMioBvO81EUBW63G+np6dDr9SgoKPDOKy8vR2VlJaxWKwDAarWitLQUtbW13jb5+fkwGo1IS0vrailEFEIC+jOUk5OD6dOnIyUlBQ0NDfjwww/x+eef47PPPoPJZML8+fOxdOlSxMfHw2g04plnnoHVakVGRgYAYOrUqUhLS8PcuXOxatUqOBwOLF++HNnZ2fxaRfQPJqDwqa2txS9/+UucO3cOJpMJI0eOxGeffeZ9FO1rr70GrVaLWbNmwe12IzMzE6tXr/YuHxYWhi1btmDhwoWwWq2Ijo5GVlYWXnzxxe7dKyIKehqRILolnZ9cLhdMJhOcTieMRqPa5RDRFYF8NnniBBGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEqGD5EpAqGDxGpguFDRKpg+BCRKhg+RKQKhg8RqYLhQ0SqYPgQkSoYPkSkCoYPEamC4UNEqmD4EJEquhQ+r7zyCjQaDZYsWeKd1tLSguzsbPTt2xcxMTGYNWsWampqfJarrKzEzJkzERUVhaSkJDz77LNob2/vSilEFGI6HT779+/H22+/jZEjR/pM/+1vf4u///3v2LBhA4qKilBdXY1HH33UO9/j8WDmzJlobW3F7t27sW7dOnzwwQdYsWJF5/eCiEKPdEJDQ4MMGTJE8vPz5d5775XFixeLiEh9fb3o9XrZsGGDt+3x48cFgNjtdhER2bp1q2i1WnE4HN42a9asEaPRKG63+7rba2lpEafT6X1VVVUJAHE6nZ0pn4h6iNPp9Puz2ameT3Z2NmbOnAmbzeYzvbi4GG1tbT7Thw4dipSUFNjtdgCA3W7HiBEjYDabvW0yMzPhcrlQVlZ23e3l5ubCZDJ5X8nJyZ0pm4iCSMDhs379ehw8eBC5ubnXzHM4HAgPD0dcXJzPdLPZDIfD4W1zdfB0zO+Ydz05OTlwOp3eV1VVVaBlE1GQ0QXSuKqqCosXL0Z+fj4iIiJ6qqZrGAwGGAyGXtseEfW8gHo+xcXFqK2txdixY6HT6aDT6VBUVIQ33ngDOp0OZrMZra2tqK+v91mupqYGFosFAGCxWK4Z/ep439GGiH74AgqfKVOmoLS0FCUlJd7XuHHjMGfOHO+/9Xo9CgoKvMuUl5ejsrISVqsVAGC1WlFaWora2lpvm/z8fBiNRqSlpXXTbhFRsAvoa1dsbCyGDx/uMy06Ohp9+/b1Tp8/fz6WLl2K+Ph4GI1GPPPMM7BarcjIyAAATJ06FWlpaZg7dy5WrVoFh8OB5cuXIzs7m1+tiP6BBBQ+/njttdeg1Woxa9YsuN1uZGZmYvXq1d75YWFh2LJlCxYuXAir1Yro6GhkZWXhxRdf7O5SiCiIaURE1C4iUC6XCyaTCU6nE0ajUe1yiOiKQD6bvLaLiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hUwfAhIlUwfIhIFQwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFShU7uAzhARAIDL5VK5EiK6WsdnsuMzejMhGT4XLlwAACQnJ6tcCRFdT0NDA0wm003bhGT4xMfHAwAqKytvuYPBxuVyITk5GVVVVTAajWqX4zfW3btCtW4RQUNDA/r373/LtiEZPlrtd4eqTCZTSP1irmY0GkOydtbdu0Kxbn87BDzgTESqYPgQkSpCMnwMBgNeeOEFGAwGtUsJWKjWzrp7V6jWHQiN+DMmRkTUzUKy50NEoY/hQ0SqYPgQkSoYPkSkCoYPEakiJMPnrbfewqBBgxAREYGJEydi3759qtazc+dO/OxnP0P//v2h0Wjw8ccf+8wXEaxYsQL9+vVDZGQkbDYbTp065dPm4sWLmDNnDoxGI+Li4jB//nw0Njb2aN25ubkYP348YmNjkZSUhIcffhjl5eU+bVpaWpCdnY2+ffsiJiYGs2bNQk1NjU+byspKzJw5E1FRUUhKSsKzzz6L9vb2Hqt7zZo1GDlypPfsX6vViry8vKCu+XpeeeUVaDQaLFmyJORq7xYSYtavXy/h4eHy3nvvSVlZmTz11FMSFxcnNTU1qtW0detW+cMf/iAbN24UALJp0yaf+a+88oqYTCb5+OOP5fDhw/Lggw9KamqqNDc3e9tMmzZNRo0aJXv27JEvvvhCBg8eLLNnz+7RujMzM+X999+Xo0ePSklJicyYMUNSUlKksbHR2+bpp5+W5ORkKSgokAMHDkhGRobcfffd3vnt7e0yfPhwsdlscujQIdm6daskJCRITk5Oj9W9efNm+fTTT+XkyZNSXl4uy5YtE71eL0ePHg3amr9v3759MmjQIBk5cqQsXrzYOz0Uau8uIRc+EyZMkOzsbO97j8cj/fv3l9zcXBWr+j/fDx9FUcRiscirr77qnVZfXy8Gg0H+9re/iYjIsWPHBIDs37/f2yYvL080Go2cPXu212qvra0VAFJUVOStU6/Xy4YNG7xtjh8/LgDEbreLyHfBq9VqxeFweNusWbNGjEajuN3uXqu9T58+8u6774ZEzQ0NDTJkyBDJz8+Xe++91xs+oVB7dwqpr12tra0oLi6GzWbzTtNqtbDZbLDb7SpWdmMVFRVwOBw+NZtMJkycONFbs91uR1xcHMaNG+dtY7PZoNVqsXfv3l6r1el0Avi/uwYUFxejra3Np/ahQ4ciJSXFp/YRI0bAbDZ722RmZsLlcqGsrKzHa/Z4PFi/fj2amppgtVpDoubs7GzMnDnTp0YgNH7e3Smkrmqvq6uDx+Px+cEDgNlsxokTJ1Sq6uYcDgcAXLfmjnkOhwNJSUk+83U6HeLj471tepqiKFiyZAkmTZqE4cOHe+sKDw9HXFzcTWu/3r51zOsppaWlsFqtaGlpQUxMDDZt2oS0tDSUlJQEbc0AsH79ehw8eBD79++/Zl4w/7x7QkiFD/Wc7OxsHD16FF9++aXapfjlrrvuQklJCZxOJz766CNkZWWhqKhI7bJuqqqqCosXL0Z+fj4iIiLULkd1IfW1KyEhAWFhYdcc/a+pqYHFYlGpqpvrqOtmNVssFtTW1vrMb29vx8WLF3tlvxYtWoQtW7Zgx44dGDBggHe6xWJBa2sr6uvrb1r79fatY15PCQ8Px+DBg5Geno7c3FyMGjUKr7/+elDXXFxcjNraWowdOxY6nQ46nQ5FRUV44403oNPpYDabg7b2nhBS4RMeHo709HQUFBR4pymKgoKCAlitVhUru7HU1FRYLBafml0uF/bu3eut2Wq1or6+HsXFxd42hYWFUBQFEydO7LHaRASLFi3Cpk2bUFhYiNTUVJ/56enp0Ov1PrWXl5ejsrLSp/bS0lKf8MzPz4fRaERaWlqP1f59iqLA7XYHdc1TpkxBaWkpSkpKvK9x48Zhzpw53n8Ha+09Qu0j3oFav369GAwG+eCDD+TYsWPyq1/9SuLi4nyO/ve2hoYGOXTokBw6dEgAyJ///Gc5dOiQnDlzRkS+G2qPi4uTTz75RI4cOSIPPfTQdYfax4wZI3v37pUvv/xShgwZ0uND7QsXLhSTySSff/65nDt3zvu6fPmyt83TTz8tKSkpUlhYKAcOHBCr1SpWq9U7v2Pod+rUqVJSUiLbtm2TxMTEHh36fe6556SoqEgqKirkyJEj8txzz4lGo5Ht27cHbc03cvVoV6jV3lUhFz4iIm+++aakpKRIeHi4TJgwQfbs2aNqPTt27BAA17yysrJE5Lvh9ueff17MZrMYDAaZMmWKlJeX+6zjwoULMnv2bImJiRGj0ShPPPGENDQ09Gjd16sZgLz//vveNs3NzfLrX/9a+vTpI1FRUfLII4/IuXPnfNZz+vRpmT59ukRGRkpCQoL8y7/8i7S1tfVY3U8++aQMHDhQwsPDJTExUaZMmeINnmCt+Ua+Hz6hVHtX8X4+RKSKkDrmQ0Q/HAwfIlIFw4eIVMHwISJVMHyISBUMHyJSBcOHiFTB8CEiVTB8iEgVDB8iUgXDh4hU8f8BIwjvI+pr1K8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "110.70252609128389"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "play(True)[-1]"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第7章-DQN算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python [conda env:pt39]",
   "language": "python",
   "name": "conda-env-pt39-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
